The
decorrelation
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 725 , Uni p: 0.01309165 , Uncorrelated Base: 29 , Outcome-Driven Size: 0 , Base Size: 29
#>
#>
1 <R=1.000,r=0.975,N= 185>, Top: 31( 29 )[ 1 : 31 Fa= 30 : 0.975 ]( 30 , 78 , 0 ),<|>Tot Used: 108 , Added: 78 , Zero Std: 0 , Max Cor: 1.000
#>
2 <R=1.000,r=0.975,N= 185>, Top: 8( 26 )[ 1 : 8 Fa= 38 : 0.975 ]( 8 , 51 , 30 ),<|>Tot Used: 153 , Added: 51 , Zero Std: 0 , Max Cor: 1.000
#>
3 <R=1.000,r=0.975,N= 185>, Top: 7( 15 )[ 1 : 7 Fa= 44 : 0.975 ]( 7 , 30 , 38 ),<|>Tot Used: 178 , Added: 30 , Zero Std: 0 , Max Cor: 0.999
#>
4 <R=0.999,r=0.975,N= 185>, Top: 5( 6 )[ 1 : 5 Fa= 48 : 0.975 ]( 4 , 24 , 44 ),<|>Tot Used: 181 , Added: 24 , Zero Std: 0 , Max Cor: 0.975
#>
5 <R=0.975,r=0.962,N= 185>, Top: 53( 1 )[ 1 : 53 Fa= 80 : 0.962 ]( 51 , 59 , 48 ),<|>Tot Used: 247 , Added: 59 , Zero Std: 0 , Max Cor: 0.974
#>
6 <R=0.974,r=0.962,N= 185>, Top: 4( 1 )[ 1 : 4 Fa= 83 : 0.962 ]( 4 , 4 , 80 ),<|>Tot Used: 251 , Added: 4 , Zero Std: 0 , Max Cor: 0.962
#>
7 <R=0.962,r=0.931,N= 222>, Top: 75[ 3 ]( 1 )=[ 2 : 75 Fa= 127 : 0.942 ]( 72 , 105 , 83 ),<|>Tot Used: 368 , Added: 105 , Zero Std: 0 , Max Cor: 0.994
#>
8 <R=0.994,r=0.947,N= 222>, Top: 9( 1 )[ 1 : 9 Fa= 135 : 0.947 ]( 9 , 9 , 127 ),<|>Tot Used: 383 , Added: 9 , Zero Std: 0 , Max Cor: 0.941
#>
9 <R=0.941,r=0.871,N= 302>, Top: 93( 1 )=[ 2 : 93 Fa= 182 : 0.911 ]( 87 , 139 , 135 ),<|>Tot Used: 523 , Added: 139 , Zero Std: 0 , Max Cor: 0.960
#>
10 <R=0.960,r=0.880,N= 302>, Top: 20( 1 )[ 1 : 20 Fa= 191 : 0.880 ]( 18 , 26 , 182 ),<|>Tot Used: 545 , Added: 26 , Zero Std: 0 , Max Cor: 0.915
#>
11 <R=0.915,r=0.807,N= 245>, Top: 73( 5 )[ 1 : 73 Fa= 219 : 0.807 ]( 71 , 121 , 191 ),<|>Tot Used: 593 , Added: 121 , Zero Std: 0 , Max Cor: 0.926
#>
12 <R=0.926,r=0.813,N= 245>, Top: 17( 3 )[ 1 : 17 Fa= 226 : 0.813 ]( 16 , 20 , 219 ),<|>Tot Used: 610 , Added: 20 , Zero Std: 0 , Max Cor: 0.851
#>
13 <R=0.851,r=0.800,N= 49>, Top: 23( 1 )[ 1 : 23 Fa= 234 : 0.800 ]( 23 , 26 , 226 ),<|>Tot Used: 626 , Added: 26 , Zero Std: 0 , Max Cor: 0.799
#>
14 <R=0.799,r=0.800,N= 49>
#>
[ 14 ], 0.7986822 Decor Dimension: 626 Nused: 626 . Cor to Base: 244 , ABase: 17 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
7.73e+08
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
1.8e+08
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
0.306
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
0.218
The decorrelation
matrix
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}

Univariate
Analysis
Univariate
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : V102_WL 200 : V288_WL 300 : V535_WL 400 : V635_WL 500 :
V37_NBI
600 : V137_NBI 700 : V470_NBI
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : La_V102_WL 200 : La_V288_WL 300 : La_V535_WL 400 : La_V635_WL
500 : La_V37_NBI
600 : La_V137_NBI 700 : La_V470_NBI
Final Table
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##top variables
topvar <- c(1:length(varlist)) <= TopVariables
tableRaw <- univarRAW$orderframe[topvar,univariate_columns]
pander::pander(tableRaw)
| V172_WL |
3.55e+03 |
1.78e+03 |
1046.667 |
537.2409 |
0.718095 |
0.933 |
| V220_NBI |
2.01e+02 |
1.20e+02 |
51.524 |
27.8220 |
0.747592 |
0.929 |
| V220_WL |
1.96e+02 |
1.07e+02 |
52.381 |
42.7370 |
0.097268 |
0.927 |
| V477_NBI |
6.18e-02 |
2.98e-02 |
0.149 |
0.1717 |
0.000358 |
0.925 |
| V169_NBI |
1.26e+03 |
8.24e+02 |
346.619 |
198.5476 |
0.350000 |
0.920 |
| V196_NBI |
4.52e+02 |
2.51e+02 |
134.238 |
66.3226 |
0.410564 |
0.920 |
| V182_NBI |
3.44e+02 |
2.17e+02 |
95.190 |
48.8412 |
0.793090 |
0.915 |
| V470_NBI |
3.79e-01 |
1.34e-01 |
0.188 |
0.0682 |
0.948083 |
0.913 |
| V182_WL |
3.17e+02 |
1.69e+02 |
96.476 |
87.3691 |
0.142781 |
0.912 |
| V474_NBI |
3.40e+00 |
3.13e-01 |
2.680 |
0.5481 |
0.222068 |
0.912 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
pander::pander(finalTable)
| V474_NBI |
3.40e+00 |
3.13e-01 |
2.68e+00 |
5.48e-01 |
0.2221 |
0.912 |
| V169_WL |
1.20e+03 |
6.66e+02 |
4.03e+02 |
4.20e+02 |
0.0543 |
0.897 |
| V474_WL |
3.19e+00 |
4.57e-01 |
2.36e+00 |
5.29e-01 |
0.9972 |
0.882 |
| V4_WL |
1.67e+03 |
9.90e+02 |
6.00e+02 |
4.77e+02 |
0.0868 |
0.874 |
| La_V69_WL |
1.03e-03 |
1.66e-03 |
-1.02e-03 |
1.56e-03 |
0.3074 |
0.872 |
| V473_NBI |
1.22e-01 |
4.19e-02 |
2.12e-01 |
1.67e-01 |
0.0188 |
0.865 |
| V485_WL |
3.14e+00 |
4.63e-01 |
2.44e+00 |
4.51e-01 |
0.5556 |
0.853 |
| V473_WL |
1.57e-01 |
5.21e-02 |
2.82e-01 |
1.38e-01 |
0.3081 |
0.850 |
| V198_NBI |
3.83e+02 |
2.17e+02 |
1.54e+02 |
9.18e+01 |
0.3333 |
0.844 |
| La_V200_NBI |
-1.08e+03 |
2.09e+03 |
1.03e+03 |
2.22e+03 |
0.0393 |
0.835 |
| La_V91_NBI |
6.01e-03 |
4.07e-03 |
1.88e-03 |
2.59e-03 |
0.9531 |
0.825 |
| La_V478_WL |
9.13e-01 |
2.30e-02 |
9.48e-01 |
3.09e-02 |
0.9838 |
0.824 |
| La_V296_NBI |
-3.21e+02 |
1.47e+03 |
3.76e+02 |
9.94e+02 |
0.0346 |
0.810 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
theCharformulas <- attr(dc,"LatentCharFormulas")
finalTable <- rbind(finalTable,tableRaw[topvar[!(topvar %in% topLAvar)],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- theCharformulas[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| V172_WL |
NA |
3.55e+03 |
1.78e+03 |
1.05e+03 |
5.37e+02 |
0.718095 |
0.933 |
0.933 |
NA |
| V220_NBI |
NA |
2.01e+02 |
1.20e+02 |
5.15e+01 |
2.78e+01 |
0.747592 |
0.929 |
0.929 |
NA |
| V220_WL |
NA |
1.96e+02 |
1.07e+02 |
5.24e+01 |
4.27e+01 |
0.097268 |
0.927 |
0.927 |
NA |
| V477_NBI |
NA |
6.18e-02 |
2.98e-02 |
1.49e-01 |
1.72e-01 |
0.000358 |
0.925 |
0.925 |
NA |
| V169_NBI |
NA |
1.26e+03 |
8.24e+02 |
3.47e+02 |
1.99e+02 |
0.350000 |
0.920 |
0.920 |
NA |
| V196_NBI |
NA |
4.52e+02 |
2.51e+02 |
1.34e+02 |
6.63e+01 |
0.410564 |
0.920 |
0.920 |
NA |
| V182_NBI |
NA |
3.44e+02 |
2.17e+02 |
9.52e+01 |
4.88e+01 |
0.793090 |
0.915 |
0.915 |
NA |
| V470_NBI |
NA |
3.79e-01 |
1.34e-01 |
1.88e-01 |
6.82e-02 |
0.948083 |
0.913 |
0.913 |
NA |
| V474_NBI |
NA |
3.40e+00 |
3.13e-01 |
2.68e+00 |
5.48e-01 |
0.222068 |
0.912 |
0.912 |
NA |
| V182_WL |
NA |
3.17e+02 |
1.69e+02 |
9.65e+01 |
8.74e+01 |
0.142781 |
0.912 |
0.912 |
NA |
| V474_NBI1 |
NA |
3.40e+00 |
3.13e-01 |
2.68e+00 |
5.48e-01 |
0.222068 |
0.912 |
NA |
NA |
| V169_WL |
NA |
1.20e+03 |
6.66e+02 |
4.03e+02 |
4.20e+02 |
0.054330 |
0.897 |
0.897 |
9 |
| V474_WL |
NA |
3.19e+00 |
4.57e-01 |
2.36e+00 |
5.29e-01 |
0.997159 |
0.882 |
0.882 |
NA |
| V4_WL |
NA |
1.67e+03 |
9.90e+02 |
6.00e+02 |
4.77e+02 |
0.086777 |
0.874 |
0.874 |
4 |
| La_V69_WL |
- (1.863)V47_WL + V69_WL |
1.03e-03 |
1.66e-03 |
-1.02e-03 |
1.56e-03 |
0.307376 |
0.872 |
0.617 |
-1 |
| V473_NBI |
NA |
1.22e-01 |
4.19e-02 |
2.12e-01 |
1.67e-01 |
0.018789 |
0.865 |
0.865 |
2 |
| V485_WL |
NA |
3.14e+00 |
4.63e-01 |
2.44e+00 |
4.51e-01 |
0.555561 |
0.853 |
0.853 |
1 |
| V473_WL |
NA |
1.57e-01 |
5.21e-02 |
2.82e-01 |
1.38e-01 |
0.308090 |
0.850 |
0.850 |
1 |
| V198_NBI |
NA |
3.83e+02 |
2.17e+02 |
1.54e+02 |
9.18e+01 |
0.333253 |
0.844 |
0.844 |
13 |
| La_V200_NBI |
- (0.862)V184_NBI + V200_NBI |
-1.08e+03 |
2.09e+03 |
1.03e+03 |
2.22e+03 |
0.039308 |
0.835 |
0.763 |
-1 |
| La_V91_NBI |
- (2.866)V47_NBI + V91_NBI |
6.01e-03 |
4.07e-03 |
1.88e-03 |
2.59e-03 |
0.953148 |
0.825 |
0.578 |
1 |
| La_V478_WL |
+ (0.130)V475_WL + V478_WL |
9.13e-01 |
2.30e-02 |
9.48e-01 |
3.09e-02 |
0.983768 |
0.824 |
0.788 |
1 |
| La_V296_NBI |
- (13.104)V198_NBI + V296_NBI |
-3.21e+02 |
1.47e+03 |
3.76e+02 |
9.94e+02 |
0.034643 |
0.810 |
0.758 |
-1 |
Comparing IDeA vs
PCA vs EFA
PCA
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")

EFA
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}

Effect on CAR
modeling
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}

pander::pander(table(dataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.908 |
0.819 |
0.962 |
| 3 |
se |
0.945 |
0.849 |
0.989 |
| 4 |
sp |
0.810 |
0.581 |
0.946 |
| 6 |
diag.or |
73.667 |
14.963 |
362.674 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}

pander::pander(table(DEdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.947 |
0.871 |
0.985 |
| 3 |
se |
0.982 |
0.903 |
1.000 |
| 4 |
sp |
0.857 |
0.637 |
0.970 |
| 6 |
diag.or |
324.000 |
31.676 |
3314.066 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.895 |
0.803 |
0.953 |
| 3 |
se |
0.964 |
0.875 |
0.996 |
| 4 |
sp |
0.714 |
0.478 |
0.887 |
| 6 |
diag.or |
66.250 |
12.104 |
362.601 |
par(op)
EFA
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}

pander::pander(table(EFAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.934 |
0.853 |
0.978 |
| 3 |
se |
1.000 |
0.935 |
1.000 |
| 4 |
sp |
0.762 |
0.528 |
0.918 |
| 6 |
diag.or |
Inf |
NA |
Inf |
par(op)